#Creation of Study Data Set 
#by Justin Rodgers & Jihoon Song
#March 2014

rm(list=ls())


setwd("C:/Users/Jihoon/Desktop/_Classes/2014S_Calsses/GOV 2001 Advanced Quantitative Research Methodology/03_Replication/NewData/NewData")

#Import datasets & combine cohorts 12 & 15 for each type of dataset
#Individual-level variable datasets (All from Wave 1)
#Sex, Age, Race, etc.
demo12 <- read.csv("demo1_12.csv")
demo15 <- read.csv("demo1_15.csv")
demo <- rbind(demo12, demo15)
#Number of siblings, length of family residence
fshh12 <- read.csv("fshh1_12.csv")
fshh15 <- read.csv("fshh1_15.csv")
fshh <- rbind(fshh12, fshh15)
#Depression (youth Self Report )
depr12 <- read.csv("ysr1_12.csv")
depr15 <- read.csv("ysr1_15.csv")
depr <- rbind(depr12, depr15)
#Emotionality, Impulsivity, & Sociability (EASI Temperment Survey)
easi12 <- read.csv("easi1_12.csv")
easi15 <- read.csv("easi1_15.csv")
easi <- rbind(easi12, easi15)
#Substance Use (Subu)
subu12 <- read.csv("subu1_12.csv")
subu15 <- read.csv("subu1_15.csv")
subu <- rbind(subu12, subu15)
#Exposure to suicidal behavior ()
fmhh12 <- read.csv("fmhh1_15.csv")
fmhh15 <- read.csv("fmhh1_15.csv")
fmhh <- rbind(fmhh12, fmhh15)
#Family attachment & support (PSRS)
psrs12 <- read.csv("psrs1_12.csv")
psrs15 <- read.csv("psrs1_15.csv")
psrs <- rbind(psrs12, psrs15)
#Other depressiion dataset
cbl12 <- read.csv("cbcl1_12.csv")
cbl15 <- read.csv("cbcl1_15.csv")
cbl <- rbind(cbl12, cbl15) 
#Import Interview Dates
dates <- read.csv("interview_dates.csv")

#Neighborhood measures datasets
#Neighborhood-level Community Survey Data (needed for Concentrated poverty, immigrant concentration)
commn <- read.csv("CommunityLevelData.csv")
commn$nc <- commn$NC_NUM
commn$NC_NUM <- NULL

#Individual-level Community Survey Data (needed to build Collective Efficacy variable)
commi <- read.csv("master1.csv")
#keep observations for only cohorts 12 & 15
commi <- commi[ which(commi$cohort==12 | commi$cohort==15), ]

CS <- read.csv("CommSurvey.csv")

#Outcome Variable (suicide) datasets (from Wave 2)
suic12 <- read.csv("suic2_12.csv")
suic15 <- read.csv("suic2_15.csv")
suic <- rbind(suic12, suic15)
rm(suic12, suic15)

#Female variable
attach(commi)
commi$female[sex == 0] <- 1
commi$female[sex == 1] <- 0

#Age variable
commi$age <- commi$AGE1

#Mom's Education (1-5)
commi$mom_edu <- commi$EDUC_MOM

#number siblings
commi$sibs <- commi$NSIBU19

#Length at family residence (in years, so use round() function)
commi$res_yrs <- round(commi$DB1A0)
#commi$res_yrs <- round(commi$DB4_1)


#Hispanic variable (DB26==3, then Hispanic==1)
attach(demo)
demo$hispanic[demo$DB26 == 1] <- 1
demo$hispanic[demo$DB26 ==0] <- 0

#Black variable (DB27==3, then Black==1)
demo$black[demo$DB27 == 3] <- 1
demo$black[demo$DB27 < 3 | demo$DB27 > 3] <- 0

commi$two_parents[commi$famstruc<3]<-1
commi$two_parents[commi$famstruc>=3]<-0

#Add what variables we have to the 'final' study dataset
final <- commi[c("nc", "subid", "cohort", "female", "age", "mom_edu", "sibs", "res_yrs", "two_parents")]
demo_vars <- demo[c("subid", "hispanic", "black")]
final <- merge(final, demo_vars, by="subid", all.x=TRUE)

#Two-parent household (1=2 parent household)

#fshh$two_parents[fshh$FB5_1==1 | fshh$FB15_1==1] <- 1
#fshh$two_parents[fshh$FB5_1==0 | fshh$FB15_1==0 | 
#                   fshh$FB4_1>=2 | fshh$FB14_1>=2 |
#                   fshh$FB4_2>=1 | fshh$FB14_2>=1 |
#                   fshh$FB5_2>=1 | fshh$FB15_2>=1 |
#                   fshh$FB1A0>=2 | fshh$FB1B0>=2] <- 0

#fshh_vars <- fshh[c("subid", "two_parents")]

#add two_parents variable to final dataset
#final <- merge(final, fshh_vars, by="subid", all.x=TRUE)



#Depression variable (Need to buy Manual for the Youth Self Report to get rest of items)
#"Child Behavior Check list (Achenbach 1991)

items <- c("YS12", "YS14", "YS18", "YS31", "YS32", "YS33", "YS34", "YS35", "YS45", "YS50",
           "YS52", "YS71", "YS89", "YS91", "YS103", "YS112")
cronbach(depr[, items]) 

depr$depression <- rowSums(depr[items], na.rm=TRUE)
depr <- depr[c("subid", "depression")]
final <- merge(final, depr, by="subid")


#Emotionality (Average of 5 item scale - based on parents' perceptions EASI)
easi$emotionality <- easi$EASI_7

#Impulsivity
easi$impulsivity <-easi$EASI_1

#Sociability
easi$sociability <- easi$EASI_8

#Substance Abuse (SUM of THREE items: ever smoke cigs SV1A0, ever get drunk, & ever smoke marijuana)
#drunk
subu$drunk[subu$SV3G0==0] <- 0
subu$drunk[subu$SV3G0>0] <- 1

#marijuana
#EVER subu$SV7A0
#past 12 months subu$SV7D0

subu$marijo[subu$SV7A0==0 | subu$SV7D0==0] <- 0
subu$marijo[subu$SV7D0>0] <- 1

#cigs

subu$cigs[subu$SV1A0==0 | subu$SV1D0==0] <- 0
subu$cigs[subu$SV1D0>0] <- 1

#sum them up:
sub_items <- c("cigs", "drunk", "marijo")
subu$sub_abuse <- rowSums(subu[sub_items], na.rm=TRUE)

final2 <- final #backup

easi2 <- easi[c("subid", "emotionality", "impulsivity", "sociability")]
final <- merge(final, easi2, by="subid", all=T)

sub<- subu[c("subid", "sub_abuse")]
final <- merge(final, sub, by="subid", all=T)

final2 <- final

#Family attachment and support - standardized average of: (PSS4 PSS7  PSS8 PSS10 PSS11 PSS14)
#All reverse-coded, except for PSS7
psrs$PSS4[psrs$PSS4==3] <- 1
psrs$PSS4[psrs$PSS4==1] <- 3
psrs$PSS4[psrs$PSS4==2] <- 2

psrs$PSS8[psrs$PSS8==3] <- 1
psrs$PSS8[psrs$PSS8==1] <- 3
psrs$PSS8[psrs$PSS8==2] <- 2

psrs$PSS10[psrs$PSS10==3] <- 1
psrs$PSS10[psrs$PSS10==1] <- 3
psrs$PSS10[psrs$PSS10==2] <- 2

psrs$PSS11[psrs$PSS11==3] <- 1
psrs$PSS11[psrs$PSS11==1] <- 3
psrs$PSS11[psrs$PSS11==2] <- 2

psrs$PSS14[psrs$PSS14==3] <- 1
psrs$PSS14[psrs$PSS14==1] <- 3
psrs$PSS14[psrs$PSS14==2] <- 2


psrs$PSS4z <- scale(psrs$PSS4, center=TRUE, scale=TRUE)
psrs$PSS7z <- scale(psrs$PSS7, center=TRUE, scale=TRUE)
psrs$PSS8z <- scale(psrs$PSS8, center=TRUE, scale=TRUE)
psrs$PSS10z <- scale(psrs$PSS10, center=TRUE, scale=TRUE)
psrs$PSS11z <- scale(psrs$PSS11, center=TRUE, scale=TRUE)
psrs$PSS14z <- scale(psrs$PSS14, center=TRUE, scale=TRUE)
#psrs$fam_attach <- mean(PSS4z, psrs$PSS7Nz, psrs$PSS8z, psrs$PSS10z, psrs$PSS11z, psrs$PSS14z, na.rm=TRUE)
#psrs$fam_attach <- mean(PSS4z, na.rm=TRUE)


fam_items <- c("PSS4z", "PSS7z", "PSS8z", "PSS10z", "PSS11z", "PSS14z")
psrs$fam_attach <- (rowSums(psrs[fam_items], na.rm=TRUE))/6
summary(psrs$fam_attach)
sd(psrs$fam_attach)
#check on this later - I'm not getting SD=1. Why not?

psrs2 <- psrs[c("subid", "fam_attach")]
final <- merge(final, psrs2, by="subid", all=T)


#Exposure to suicidal behavior (suic datasets)
#fmhh$exp_suic <- fmhh$FM8
#fmhh2 <- fmhh[c("subid", "exp_suic")]
#final <- merge(final, fmhh2, by="subid", all=T)

suic$exp_suic <- suic$FT19
suic2 <- suic[c("subid", "exp_suic")]
final <- merge(final, suic2, by="subid", all=T)





## ** OUTCOME  VARIABLE (Suicide Attempt) ** ##


suic$suicide <- suic$FT8
suic <- suic[c("subid", "suicide")]
table(suic$suicide, useNA="always")

final3 <- merge(final, suic, by="subid")

final4 <- final3[c("subid", "nc", "suicide", "female", "age", "hispanic", "black", "mom_edu", "sibs", "depression", "emotionality", "impulsivity", "sociability", "sub_abuse", "fam_attach", "res_yrs", "two_parents", "exp_suic")]
final4 <- na.omit(final4)


## **NEIGHBORHOOD - LEVEL VARIABLES** ##


#Concentrated Poverty is already made (CPOV90)
#Immigrant Concentration is already made as well (CFORBORN)
summary(commn$CPOV90, na.rm=T)
sd(commn$CPOV90, na.rm=T)
summary(commn$CFORBORN)
#CSAME

nc_vars <- commn[c("nc", "CPOV90", "CFORBORN")]

final5<- merge(final4, nc_vars, by="nc", all=T )
final5 <- na.omit(final5)





#Residential Stability
res <- merge(demo, dates, by="subid")[c("subid", "DB1A0", "interview_year")]
res$eightyfive <- res$interview_year - res$DB1A0
res$stability[res$eightyfive<1986.00] <- 1
res$stability[res$eightyfive>=1986.00] <- 0

#Aggregate to NC-level
comm2 <- commi[c("subid", "nc")]
comm2 <- merge(comm2, res, by="subid", all=T)

stability_percent <- aggregate(comm2$stability, list(nc = comm2$nc), mean, na.action = na.omit)
stability_percent$CSTABILITY <- stability_percent$x
comm2 <- merge(comm2, stability_percent, by="nc")
comm2$x <- NULL
comm2 <- comm2[c("subid", "CSTABILITY")]
#Add to final dataset
final5 <- merge(final5, comm2, by="subid")



#Collective Efficacy
#Scaled Average

#Cohesion (Qllb, Qlle, Qllf2, Qllk2, Qllm)
#Their social capital summary variable: CS$CSCAPITL (mean of 3.53)

#All items must be reverse coded:
cs <- CS
cs$subid <- CS$RC_NUM

cs$q11b[as.numeric(cs$Q11B)==1] <- 5
cs$q11b[as.numeric(cs$Q11B)==2] <- 4
cs$q11b[as.numeric(cs$Q11B)==3 | as.numeric(cs$Q11B)==-96] <- 3
cs$q11b[as.numeric(cs$Q11B)==4] <- 2
cs$q11b[as.numeric(cs$Q11B)==5] <- 1
cs$q11b_s <- scale(cs$q11b, center=TRUE, scale=TRUE)

cs$q11e[as.numeric(cs$Q11E)==1] <- 5
cs$q11e[as.numeric(cs$Q11E)==2] <- 4
cs$q11e[as.numeric(cs$Q11E)==3 | as.numeric(cs$Q11E)==-96] <- 3
cs$q11e[as.numeric(cs$Q11E)==4] <- 2
cs$q11e[as.numeric(cs$Q11E)==5] <- 1
cs$q11e_s <- scale(cs$q11e, center=TRUE, scale=TRUE)

cs$q11f <- as.numeric(cs$Q11F)
cs$q11f <- as.numeric(cs$Q11F)
cs$q11f <- as.numeric(cs$Q11F)
cs$q11f <- as.numeric(cs$Q11F)
cs$q11f <- as.numeric(cs$Q11F)
cs$q11f_s <- scale(cs$q11f, center=TRUE, scale=TRUE)

cs$q11k <- as.numeric(cs$Q11K)
cs$q11k <- as.numeric(cs$Q11K)
cs$q11k <- as.numeric(cs$Q11K)
cs$q11k <- as.numeric(cs$Q11K)
cs$q11k <- as.numeric(cs$Q11K)
cs$q11k_s <- scale(cs$q11k, center=TRUE, scale=TRUE)

cs$q11m[as.numeric(cs$Q11M)==1] <- 5
cs$q11m[as.numeric(cs$Q11M)==2] <- 4
cs$q11m[as.numeric(cs$Q11M)==3 | as.numeric(cs$Q11M)==-96] <- 3
cs$q11m[as.numeric(cs$Q11M)==4] <- 2
cs$q11m[as.numeric(cs$Q11M)==5] <- 1
cs$q11m_s <- scale(cs$q11m, center=TRUE, scale=TRUE)

#average the above scaled items
cohesion_items <- c("q11e_s", "q11b_s", "q11f_s", "q11k_s", "q11m_s")
cs$cohesion <- (rowSums(cs[cohesion_items], na.rm=TRUE))/5
summary(cs$cohesion)
sd(cs$cohesion)

#Social Capital items (referred to Intergenerational/Informal Social Control by authors)

cs$q11d[as.numeric(cs$Q11D)==1] <- 5
cs$q11d[as.numeric(cs$Q11D)==2] <- 4
cs$q11d[as.numeric(cs$Q11D)==3 | as.numeric(cs$Q11B)==-96] <- 3
cs$q11d[as.numeric(cs$Q11D)==4] <- 2
cs$q11d[as.numeric(cs$Q11D)==5] <- 1
cs$q11d_s <- scale(cs$q11d, center=TRUE, scale=TRUE)

cs$q11g[as.numeric(cs$Q11G)==1] <- 5
cs$q11g[as.numeric(cs$Q11G)==2] <- 4
cs$q11g[as.numeric(cs$Q11G)==3 | as.numeric(cs$Q11E)==-96] <- 3
cs$q11g[as.numeric(cs$Q11G)==4] <- 2
cs$q11g[as.numeric(cs$Q11G)==5] <- 1
cs$q11g_s <- scale(cs$q11g, center=TRUE, scale=TRUE)

cs$q11n[as.numeric(cs$Q11N)==1] <- 5
cs$q11n[as.numeric(cs$Q11N)==2] <- 4
cs$q11n[as.numeric(cs$Q11N)==3 | as.numeric(cs$Q11B)==-96] <- 3
cs$q11n[as.numeric(cs$Q11N)==4] <- 2
cs$q11n[as.numeric(cs$Q11N)==5] <- 1
cs$q11n_s <- scale(cs$q11n, center=TRUE, scale=TRUE)

cs$q11p[as.numeric(cs$Q11P)==1] <- 5
cs$q11p[as.numeric(cs$Q11P)==2] <- 4
cs$q11p[as.numeric(cs$Q11P)==3 | as.numeric(cs$Q11E)==-96] <- 3
cs$q11p[as.numeric(cs$Q11P)==4] <- 2
cs$q11p[as.numeric(cs$Q11P)==5] <- 1
cs$q11p_s <- scale(cs$q11p, center=TRUE, scale=TRUE)

cs$q11t[as.numeric(cs$Q11T)==1] <- 5
cs$q11t[as.numeric(cs$Q11T)==2] <- 4
cs$q11t[as.numeric(cs$Q11T)==3 | as.numeric(cs$Q11E)==-96] <- 3
cs$q11t[as.numeric(cs$Q11T)==4] <- 2
cs$q11t[as.numeric(cs$Q11T)==5] <- 1
cs$q11t_s <- scale(cs$q11t, center=TRUE, scale=TRUE)


#average the above scaled social capital items
social_capital_items <- c("q11d_s", "q11g_s", "q11n_s", "q11p_s", "q11t_s")
cs$social_capital <- (rowSums(cs[social_capital_items], na.rm=TRUE))/5
summary(cs$social_capital)
sd(cs$social_capital)

#average the above scaled items
cohesion_items <- c("q11e_s", "q11b_s", "q11f_s", "q11k_s", "q11m_s")
cs$cohesion <- (rowSums(cs[cohesion_items], na.rm=TRUE))/5
summary(cs$cohesion)
sd(cs$cohesion)

# Informal Social Control items: Q12A Q12B Q12C Q12E Q12F

cs$q11d2[as.numeric(cs$Q12A)==1] <- 5
cs$q11d2[as.numeric(cs$Q12A)==2] <- 4
cs$q11d2[as.numeric(cs$Q12A)==3 | as.numeric(cs$Q11B)==-96] <- 3
cs$q11d2[as.numeric(cs$Q12A)==4] <- 2
cs$q11d2[as.numeric(cs$Q12A)==5] <- 1
cs$q11d2_s <- scale(cs$q11d2, center=TRUE, scale=TRUE)

cs$q11g2[as.numeric(cs$Q12B)==1] <- 5
cs$q11g2[as.numeric(cs$Q12B)==2] <- 4
cs$q11g2[as.numeric(cs$Q12B)==3 | as.numeric(cs$Q11E)==-96] <- 3
cs$q11g2[as.numeric(cs$Q12B)==4] <- 2
cs$q11g2[as.numeric(cs$Q12B)==5] <- 1
cs$q11g2_s <- scale(cs$q11g2, center=TRUE, scale=TRUE)

cs$q11n2[as.numeric(cs$Q12C)==1] <- 5
cs$q11n2[as.numeric(cs$Q12C)==2] <- 4
cs$q11n2[as.numeric(cs$Q12C)==3 | as.numeric(cs$Q11B)==-96] <- 3
cs$q11n2[as.numeric(cs$Q12C)==4] <- 2
cs$q11n2[as.numeric(cs$Q12C)==5] <- 1
cs$q11n2_s <- scale(cs$q11n2, center=TRUE, scale=TRUE)

cs$q11p2[as.numeric(cs$Q12E)==1] <- 5
cs$q11p2[as.numeric(cs$Q12E)==2] <- 4
cs$q11p2[as.numeric(cs$Q12E)==3 | as.numeric(cs$Q11E)==-96] <- 3
cs$q11p2[as.numeric(cs$Q12E)==4] <- 2
cs$q11p2[as.numeric(cs$Q12E)==5] <- 1
cs$q11p2_s <- scale(cs$q11p2, center=TRUE, scale=TRUE)

cs$q11t2[as.numeric(cs$Q12F)==1] <- 5
cs$q11t2[as.numeric(cs$Q12F)==2] <- 4
cs$q11t2[as.numeric(cs$Q12F)==3 | as.numeric(cs$Q11E)==-96] <- 3
cs$q11t2[as.numeric(cs$Q12F)==4] <- 2
cs$q11t2[as.numeric(cs$Q12F)==5] <- 1
cs$q11t2_s <- scale(cs$q11t2, center=TRUE, scale=TRUE)

#non_scaled_social_items <- c("q11d2", "q11g2", "q11n2", "q11p2", "q11t2")
#cs$control <- (rowSums(cs[non_scaled_social_items], na.rm=TRUE))/5

#average the above scaled social control items
social_control_items <- c("q11d2_s", "q11g2_s", "q11n2_s", "q11p2_s", "q11t2_s")
cs$social_control <- (rowSums(cs[social_control_items], na.rm=TRUE))/5
summary(cs$social_control)
sd(cs$social_control)


#average all the above scaled items into collective efficacy measure: 
collective <- c("q11e_s", "q11b_s", "q11f_s", "q11k_s", "q11m_s",
                    "q11d_s", "q11g_s", "q11n_s", "q11p_s", "q11t_s")
cs$collective <- (rowSums(cs[collective], na.rm=TRUE))/10
summary(cs$collective)
sd(cs$collective)



#average all the above scaled items into Collective Efficacy 2 (traditional) measure: 
collective2 <- c("q11e_s", "q11b_s", "q11f_s", "q11k_s", "q11m_s",
                 "q11d2_s", "q11g2_s", "q11n2_s", "q11p2_s", "q11t2_s")
cs$collective2 <- (rowSums(cs[collective2], na.rm=TRUE))/10
summary(cs$collective2)
sd(cs$collective2)


#Aggregate to NC-level    **cs dataset uses CS$NC_NUM**
cs2 <- cs3 <- cs4 <- cs5 <- cs

comm3 <- commi[c("subid","nc")]
cs$nc <- cs$NC_NUM
cs$NC_NUM <- NULL
cs <- cs[c("nc", "collective")]

comm32 <- commi[c("subid","nc")]
cs2$nc <- cs2$NC_NUM
cs2$NC_NUM <- NULL
cs2 <- cs2[c("nc", "collective2")]

comm33 <- commi[c("subid","nc")]
cs3$nc <- cs3$NC_NUM
cs3$NC_NUM <- NULL
cs3 <- cs3[c("nc", "social_control")]

comm34 <- commi[c("subid","nc")]
cs4$nc <- cs4$NC_NUM
cs4$NC_NUM <- NULL
cs4 <- cs4[c("nc", "social_capital")]

comm35 <- commi[c("subid","nc")]
cs5$nc <- cs5$NC_NUM
cs5$NC_NUM <- NULL
cs5 <- cs5[c("nc", "cohesion")]


collective_efficacy <- aggregate(cs$collective, list(nc = cs$nc), mean, na.action = na.omit)
collective_efficacy$collective <- collective_efficacy$x
collective_efficacy$x <- NULL

collective_efficacy2 <- aggregate(cs2$collective2, list(nc = cs2$nc), mean, na.action = na.omit)
collective_efficacy2$collective2 <- collective_efficacy2$x
collective_efficacy2$x <- NULL


social_control <- aggregate(cs3$social_control, list(nc = cs$nc), mean, na.action = na.omit)
social_control$social_control <- social_control$x
social_control$x <- NULL

social_capital <- aggregate(cs4$social_capital, list(nc = cs2$nc), mean, na.action = na.omit)
social_capital$social_capital <- social_capital$x
social_capital$x <- NULL

cohesion <- aggregate(cs5$cohesion, list(nc = cs2$nc), mean, na.action = na.omit)
cohesion$cohesion <- cohesion$x
cohesion$x <- NULL

#Add to final dataset
final5 <- merge(final5, collective_efficacy, by="nc")
final5 <- merge(final5, collective_efficacy2, by="nc")
final5 <- merge(final5, social_control, by="nc")
final5 <- merge(final5, social_capital, by="nc")
final5 <- merge(final5, cohesion, by="nc")

#Centering all predictor variables

final5$female_c <- scale(final5$female, center=TRUE, scale=FALSE)
final5$age_c <- scale(final5$age, center=TRUE, scale=FALSE)
final5$hispanic_c <- scale(final5$hispanic, center=TRUE, scale=FALSE)
final5$black_c <- scale(final5$black, center=TRUE, scale=FALSE)
final5$two_parents_c <- scale(final5$two_parents, center=TRUE, scale=FALSE)
final5$mom_edu_c <- scale(final5$mom_edu, center=TRUE, scale=FALSE)
final5$sibs_c <- scale(final5$sibs, center=TRUE, scale=FALSE)
final5$res_yrs_c <- scale(final5$res_yrs, center=TRUE, scale=FALSE)
final5$depression_c <- scale(final5$depression, center=TRUE, scale=FALSE)
final5$emotionality_c <- scale(final5$emotionality, center=TRUE, scale=FALSE)
final5$impulsivity_c <- scale(final5$impulsivity, center=TRUE, scale=FALSE)
final5$sociability_c <- scale(final5$sociability, center=TRUE, scale=FALSE)
final5$sub_abuse_c <- scale(final5$sub_abuse, center=TRUE, scale=FALSE)
final5$fam_attach_c <- scale(final5$fam_attach, center=TRUE, scale=FALSE)
final5$exp_suic_c <- scale(final5$exp_suic, center=TRUE, scale=FALSE)

#Center neighborhood-level predictors?
final5$CPOV90_c <- scale(final5$CPOV90, center=TRUE, scale=FALSE)
final5$CFORBORN_c <- scale(final5$CFORBORN, center=TRUE, scale=FALSE)
final5$CSTABILITY_c <- scale(final5$CSTABILITY, center=TRUE, scale=FALSE)
final5$collective_c <- scale(final5$collective, center=TRUE, scale=FALSE)
final5$collective2_c <- scale(final5$collective2, center=TRUE, scale=FALSE)
final5$social_control_c <- scale(final5$social_control, center=TRUE, scale=FALSE)
final5$social_capital_c <- scale(final5$social_capital, center=TRUE, scale=FALSE)
final5$cohesion_c <- scale(final5$cohesion, center=TRUE, scale=FALSE)




#write.csv(final5, file="~/study_dataset.csv")

#compare with the final dataset from Justin
final_com <- read.csv("C:/Users/Jihoon/Desktop/_Classes/2014S_Calsses/GOV 2001 Advanced Quantitative Research Methodology/03_Replication/FinalProjectDataSet.csv")




...

# *End of Dataset Preparation* #
